package com.shz.appletsapi.service.webmagic.jscj;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.shz.appletsapi.model.po.Coin;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;


public class OnjscjPageProcessor implements PageProcessor {

	private Site site = Site
			.me()
			.setRetryTimes(3)
			.setSleepTime(1000)
			.setTimeOut(10*1000);
	
	private static String URL ="^((?!#).)*$";
	private Boolean flag= Boolean.TRUE;

    @Override
    public void process(Page page) {
    
    	System.out.println("@@@@"+page.getHtml());
    	String html = page.getHtml().get();
		String regEx_script = "<div class=\"new-nav\">[\\s\\S]*?<div class=\"m120\"></div>";
		Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
		Matcher m_script = p_script.matcher(html);
		String a = m_script.replaceAll("");
    }
    @Override
    public Site getSite() {
        return site;
    }
    
    
  /*  public static void main(String[] args) {

        Spider.create(new OnjscjPageProcessor())
                //从"https://github.com/code4craft"开始抓
                .addUrl("https://www.jinse.com/")
                //开启5个线程抓取
                .thread(1)
                //启动爬虫
                .run();
    }*/
}
